<?php
require_once('simple_html_dom.php');

$burl='http://www.baidu.com/s?wd=inurl%3Aquestion.php%3Fqid&pn=';

	
for($pn=0;$pn<=740;$pn=$pn+10){
	echo 'parse '.$burl.$pn."\n";
	parseurl($burl.$pn);
}


function parseurl($url){
	$html = file_get_html($url);
	$items = $html->find('table  font[color=#008000]');
	foreach($items as $item){
	  	$endpos=strpos($item->innertext,'/<b>');
		$url=substr($item->innertext,0,$endpos);
		if(empty($url))continue;
	  	$site_url='http://'.$url;
	  	$sitearray=getsite($site_url);
		$line="{$site_url}\t{$sitearray[0]}\t{$sitearray[1]}\n";
		$fh=fopen('sitelist.txt','a'); 
		fwrite($fh,$line); 
		fclose($fh); 
	}
	$html->clear();
}


function getsite($site_url){
	$cid=0;
	$html = file_get_html($site_url);
	$meta=$html->find('meta[http-equiv]',0);
	$charset=strtolower(substr($meta->content,-3));
	if('gbk'!=$charset){
		$charset='utf';
	}
	$items = $html->find('a');
	foreach($items as $item){
		if(false!==strpos($item->href,'browse.php?')){
			$endpos=strpos($item->href,'=');
			$cid=trim(substr($item->href,$endpos+1));
		}
		if($cid>0) break;
	}
	$html->clear();
	return array($cid,$charset);
	
}


?>